import json
import re

import openpyxl
import requests
from bs4 import BeautifulSoup

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
}

url = 'http://www.teandy.com/pal/index.html'

resp = requests.get(url, headers=headers)
resp.encoding = 'utf-8'

soup = BeautifulSoup(resp.text, 'lxml')
tr_lst = soup.find_all('tr')[1:]

data = [['编号', '名称', '属性', '工作', '食量', '掉落']]
for row in tr_lst:
    # 编号名称
    id_name_td = row.find_all('td')[0]
    id_name = re.findall(r'>(.*)<', str(id_name_td))[0].replace('​​​​​', '')
    id = id_name.split('-')[0]
    name = id_name.split('-')[1]

    # 属性
    attribute_td = row.find_all('td')[2]
    attributes = re.findall(r'td>(.*)</td>', str(attribute_td))[0].replace('​​​​​', '')
    attribute_lst = attributes.split('<br/>')
    attribute_data = []
    for row2 in attribute_lst:
        attribute = re.findall(r'\[(.*)]', str(row2))[0].replace('​​​​​', '')
        attribute_data.append(attribute)

    # 工作
    work_td = row.find_all('td')[5]
    works = re.findall(r'>(.*)<', str(work_td))[0].replace('​​​​​', '')
    work_lst = works.split('<br/>')

    # 食量
    eat_td = row.find_all('td')[6]
    eat = re.findall(r'>(.*)<', str(eat_td))[0].replace('​​​​​', '')

    # 掉落
    fall_td = row.find_all('td')[8]
    falls = re.findall(r'td>(.*)</td>', str(fall_td))[0].replace('​​​​​', '')
    fall_lst = falls.split('<br/>')

    # 数组汇总
    data.append([id, name, '#'.join(attribute_data), '#'.join(work_lst), eat, '#'.join(fall_lst)])

# 打印
for item in data:
    print(item)

# workbook = openpyxl.Workbook()
# sheet = workbook.worksheets[0]
# for item in data:
#     sheet.append(item)
# workbook.save('帕鲁数据.xlsx')

# 转json
# data_json = json.dumps(data, ensure_ascii=False)
# print(data_json)

resp.close()
